// baljsn_decoder.h                                                   -*-C++-*-
#ifndef INCLUDED_BALJSN_DECODER
#define INCLUDED_BALJSN_DECODER

#include <bsls_ident.h>
BSLS_IDENT("$Id: $")

//@PURPOSE: Provide a JSON decoder for `bdeat` compatible types.
//
//@CLASSES:
//  baljsn::Decoder: JSON decoder for `bdeat`-compliant types
//
//@SEE_ALSO: baljsn_decoderoptions, balsjn_decoderoptionsutil,
//           baljsn_encoder, baljsn_parserutil, baljsn_parser
//
//@DESCRIPTION: This component provides a class, `baljsn::Decoder`, for
// decoding value-semantic objects in the JSON format.  In particular, the
// `class` contains a parameterized `decode` function that decodes an object
// from a specified stream.  There are two overloaded versions of this
// function:
//
// * one that reads from a `bsl::streambuf`
// * one that reads from a `bsl::istream`
//
// This component can be used with types that support the `bdeat` framework
// (see the `bdeat` package for details), which is a compile-time interface for
// manipulating struct-like and union-like objects.  In particular, types
// generated by the `bas_codegen.pl` tool, and other dynamic types, can be
// decoded using this `class`.  The `decode` function can be invoked on any
// object that satisfies the requirements of a sequence, choice, or array
// object as defined in the `bdlat_sequencefunctions`, `bdlat_choicefunctions`,
// and `bdlat_arrayfunctions` components.
//
// Although the JSON format is easy to read and write and is very useful for
// debugging, it is relatively expensive to encode and decode and relatively
// bulky to transmit.  It is more efficient to use a binary encoding (such as
// BER) if the encoding format is under your control (see `balber_berdecoder`).
//
// Refer to the details of the JSON encoding format supported by this decoder
// in the package documentation file (doc/baljsn.txt).
//
///`validateInputIsUtf8` Option
///----------------------------
// The `baljsn::DecoderOption` parameter of the `decode` function has a
// configuration option named `validateInputIsUtf8`.  If this option is `true`,
// the `decode` function will succeed only if the encoding of the JSON data is
// UTF-8, which the JSON specification requires.  If the option is `false`,
// `decode` will not validate that the encoding of the JSON data is UTF-8, and
// may succeed even if the data does not satisfy the UTF-8 validity requirement
// of the JSON specification.  This option primarily affects the acceptance of
// string literals, which are the parts of JSON documents that may have
// rational justification for having non-UTF-8, and therefore invalid, content.
//
// Ideally, users *should* set `validateInputIsUtf8` to `true`.  However, some
// legacy applications currently might be trafficking in JSON that contains
// non-UTF-8 with no adverse effects to their clients.  Consequently, this
// option is `false` by default to maintain backward compatibility.
//
///Strict Conformance
///------------------
// The `baljsn::Decoder` class allows several convenient variances from the
// JSON grammar as described in RFC8259 (see
// https://www.rfc-editor.org/rfc/rfc8259).  If strict conformance is needed,
// users should use the `read` overloads that accept a `baljsn::DecoderOptions`
// object and set the following attributes to the values shown below:
// ```
// validateInputIsUtf8()             == true;
// allowConsecutiveSeparators()      == false;
// allowFormFeedAsWhitespace()       == false;
// allowUnescapedControlCharacters() == false;
// ```
// See also {`bdljsn_tokenizer`|Strict Conformance}.
//
///Usage
///-----
// This section illustrates intended use of this component.
//
///Example 1: Decoding into a `bas_codegen.pl`-generated from data in JSON
///-----------------------------------------------------------------------
// Consider that we want to exchange an employee's information between two
// processes.  To allow this information exchange we will define the XML schema
// representation for that class, use `bas_codegen.pl` to create the `Employee`
// `class` for storing that information, and decode into that object using the
// `baljsn` decoder.
//
// First, we will define the XML schema inside a file called `employee.xsd`:
// ```
// <?xml version='1.0' encoding='UTF-8'?>
// <xs:schema xmlns:xs='http://www.w3.org/2001/XMLSchema'
//            xmlns:test='http://bloomberg.com/schemas/test'
//            targetNamespace='http://bloomberg.com/schemas/test'
//            elementFormDefault='unqualified'>
//
//     <xs:complexType name='Address'>
//         <xs:sequence>
//             <xs:element name='street' type='xs:string'/>
//             <xs:element name='city'   type='xs:string'/>
//             <xs:element name='state'  type='xs:string'/>
//         </xs:sequence>
//     </xs:complexType>
//
//     <xs:complexType name='Employee'>
//         <xs:sequence>
//             <xs:element name='name'        type='xs:string'/>
//             <xs:element name='homeAddress' type='test:Address'/>
//             <xs:element name='age'         type='xs:int'/>
//         </xs:sequence>
//     </xs:complexType>
//
//     <xs:element name='Employee' type='test:Employee'/>
//
// </xs:schema>
// ```
// Then, we will use the `bas_codegen.pl` tool, to generate the C++ classes for
// this schema.  The following command will generate the header and
// implementation files for the all the classes in the `test_messages`
// components in the current directory:
// ```
// $ bas_codegen.pl -m msg -p test xsdfile.xsd
// ```
// Next, we will create a `test::Employee` object:
// ```
// test::Employee employee;
// ```
// Then, we will create a `baljsn::Decoder` object:
// ```
// baljsn::Decoder decoder;
// ```
// Next, we will specify the input data provided to the decoder:
// ```
// const char INPUT[] = "{\"name\":\"Bob\",\"homeAddress\":{\"street\":"
//                      "\"Lexington Ave\",\"city\":\"New York City\","
//                      "\"state\":\"New York\"},\"age\":21}";
//
// bsl::istringstream is(INPUT);
// ```
// Now, we will decode this object using the `decode` function of the baljsn
// decoder by providing it a `baljsn::DecoderOptions` object.  The decoder
// options allow us to specify that unknown elements should *not* be skipped.
// Setting this option to `false` will result in the decoder returning an error
// on encountering an unknown element:
// ```
// baljsn::DecoderOptions options;
// options.setSkipUnknownElements(false);
//
// const int rc = decoder.decode(is, &employee, options);
// assert(!rc);
// assert(is);
// ```
// Finally, we will verify that the decoded object is as expected:
// ```
// assert("Bob"           == employee.name());
// assert("Lexington Ave" == employee.homeAddress().street());
// assert("New York City" == employee.homeAddress().city());
// assert("New York"      == employee.homeAddress().state());
// assert(21              == employee.age());
// ```

#include <balscm_version.h>

#include <baljsn_decoderoptions.h>
#include <baljsn_parserutil.h>
#include <baljsn_tokenizer.h>

#include <bdlar_isref.h>
#include <bdlar_refutil.h>

#include <bdlat_attributeinfo.h>
#include <bdlat_choicefunctions.h>
#include <bdlat_customizedtypefunctions.h>
#include <bdlat_enumfunctions.h>
#include <bdlat_enumutil.h>
#include <bdlat_formattingmode.h>
#include <bdlat_selectioninfo.h>
#include <bdlat_sequencefunctions.h>
#include <bdlat_typecategory.h>
#include <bdlat_valuetypefunctions.h>

#include <bdlb_printmethods.h>

#include <bdlma_localsequentialallocator.h>

#include <bsla_fallthrough.h>

#include <bslmf_assert.h>
#include <bslmf_isintegral.h>

#include <bsls_assert.h>
#include <bsls_types.h>

#include <bsl_iostream.h>
#include <bsl_sstream.h>
#include <bsl_streambuf.h>
#include <bsl_string.h>
#include <bsl_string_view.h>
#include <bsl_unordered_set.h>

namespace BloombergLP {
namespace baljsn {

                               // =============
                               // class Decoder
                               // =============

/// This class provides a mechanism for decoding JSON data into
/// value-semantic objects.  The `decode` methods are function templates
/// that will decode any object that meets the requirements of a sequence,
/// choice, or array object as defined in the `bdlat_sequencefunctions`,
/// `bdlat_choicefunctions`, and `bdlat_choicefunctions` components
/// respectively.  These generic frameworks provide a common compile-time
/// interface for manipulating struct-like and union-like objects.  In
/// particular, the types generated by `bas_codegen.pl` provide the
/// necessary interface and can be decoded using this component.
class Decoder {

    // DATA
    bsl::ostringstream  d_logStream;            // stream to record errors
    Tokenizer           d_tokenizer;            // JSON tokenizer
    bsl::string         d_elementName;          // current element name
    int                 d_currentDepth;         // current decoding depth
    int                 d_maxDepth;             // max decoding depth
    bool                d_skipUnknownElements;  // skip unknown elements flag
    int                 d_numUnknownElementsSkipped;  // number of unknown
                                                      // elements skipped
    bool                d_allowMissingRequiredAttributes; // allow missing
                                                          // non-optional attrs

    // FRIENDS
    friend struct Decoder_DecodeImpProxy;
    friend struct Decoder_ElementVisitor;

    // PRIVATE TYPES
    struct CustomizedManipulator;

    // PRIVATE MANIPULATORS

    /// Decode into the specified `value`, of a (template parameter) `TYPE`
    /// corresponding to the specified `bdeat` `category`, the JSON data
    /// currently referred to by the tokenizer owned by this object, using
    /// the specified formatting `mode`.  Return 0 on success and a non-zero
    /// value otherwise.  The behavior is undefined unless `value`
    /// corresponds to the specified `bdeat` category and `mode` is a valid
    /// formatting mode as specified in `bdlat_FormattingMode`.  Note that
    /// `ANY_CATEGORY` shall be a tag-type defined in `bdlat_TypeCategory`.
    template <class TYPE>
    int decodeImp(TYPE *value, int mode, bdlat_TypeCategory::DynamicType);
    template <class TYPE>
    int decodeImp(TYPE *value, int mode, bdlat_TypeCategory::Sequence);
    template <class TYPE>
    int decodeImp(TYPE *value, int mode, bdlat_TypeCategory::Choice);
    template <class TYPE>
    int decodeImp(TYPE *value, int mode, bdlat_TypeCategory::Enumeration);
    template <class TYPE>
    int decodeImp(TYPE *value, int mode, bdlat_TypeCategory::CustomizedType);
    template <class TYPE>
    int decodeImp(TYPE *value, int mode, bdlat_TypeCategory::Simple);
    template <class TYPE>
    int decodeImp(TYPE *value, int mode, bdlat_TypeCategory::Array);
    template <class TYPE>
    int decodeImp(TYPE *value, int mode, bdlat_TypeCategory::NullableValue);
    int decodeImp(bsl::vector<char>         *value,
                  int                        mode,
                  bdlat_TypeCategory::Array);
    template <class TYPE, class ANY_CATEGORY>
    int decodeImp(TYPE *value, ANY_CATEGORY category);

    /// Log the latest tokenizer error to `d_logStream`.  If the tokenizer
    /// did not have an error, log the specified `alternateString`.  Return
    /// a reference to `d_logStream`.
    bsl::ostream& logTokenizerError(const char *alternateString);

    /// Skip the unknown element specified by `elementName` by discarding
    /// all the data associated with it and advancing the parser to the next
    /// element.  Return 0 on success and a non-zero value otherwise.
    int skipUnknownElement(const bsl::string_view& elementName);

  private:
    // NOT IMPLEMENTED
    Decoder(const Decoder&);
    Decoder& operator=(const Decoder&);

  public:
    // CREATORS

    /// Construct a decoder object using the optionally specified
    /// `basicAllocator`.  If `basicAllocator` is 0, the default allocator
    /// is used.
    explicit Decoder(bslma::Allocator *basicAllocator = 0);

    // MANIPULATORS

    /// Decode into the specified `value`, of a (template parameter) `TYPE`,
    /// the JSON data read from the specified `streamBuf` and using the
    /// specified `options`.  Specifying a nullptr `options` is equivalent
    /// to passing a default-constructed DecoderOptions in `options`.
    /// `TYPE` shall be a `bdeat`-compatible sequence, choice, or array
    /// type, or a `bdeat`-compatible dynamic type referring to one of those
    /// types.  Return 0 on success, and a non-zero value otherwise.  Note
    /// that this operation internally buffers input from `streambuf`, and
    /// if decoding is successful, will attempt to update the input position
    /// of `streambuf` to the last unprocessed byte.
    template <class TYPE>
    int decode(bsl::streambuf        *streamBuf,
               TYPE                  *value,
               const DecoderOptions&  options);
    template <class TYPE>
    int decode(bsl::streambuf        *streamBuf,
               TYPE                  *value,
               const DecoderOptions  *options);

    /// Decode into the specified `value`, of a (template parameter) `TYPE`,
    /// the JSON data read from the specified `stream` and using the
    /// specified `options`.  `TYPE` shall be a `bdeat`-compatible sequence,
    /// choice, or array type, or a `bdeat`-compatible dynamic type
    /// referring to one of those types.  Specifying a nullptr `options` is
    /// equivalent to passing a default-constructed DecoderOptions in
    /// `options`.  Return 0 on success, and a non-zero value otherwise.
    /// Note that this operation internally buffers input from `stream`, and
    /// if decoding is successful, will attempt to update the input position
    /// of `stream` to the last unprocessed byte.
    template <class TYPE>
    int decode(bsl::istream&          stream,
               TYPE                  *value,
               const DecoderOptions&  options);
    template <class TYPE>
    int decode(bsl::istream&          stream,
               TYPE                  *value,
               const DecoderOptions  *options);

    /// Decode an object of (template parameter) `TYPE` from the specified
    /// `streamBuf` and load the result into the specified modifiable `value`.
    /// Return 0 on success, and a non-zero value otherwise.
    ///
    /// @DEPRECATED: Use the `decode` function passed a reference to a
    /// non-modifiable `DecoderOptions` object instead.
    template <class TYPE>
    int decode(bsl::streambuf *streamBuf, TYPE *value);

    /// Decode an object of (template parameter) `TYPE` from the specified
    /// `stream` and load the result into the specified modifiable `value`.
    /// Return 0 on success, and a non-zero value otherwise.  Note that
    /// `stream` will be invalidated if the decoding fails.
    ///
    /// @DEPRECATED: Use the `decode` function passed a reference to a
    /// non-modifiable `DecoderOptions` object instead.
    template <class TYPE>
    int decode(bsl::istream& stream, TYPE *value);

    /// Decode into the specified `value`, of a (template parameter) `TYPE`,
    /// the JSON data read from the specified `streamBuf` and using the
    /// specified `options`.  Specifying a nullptr `options` is equivalent to
    /// passing a default-constructed `DecoderOptions` in `options`.  `TYPE`
    /// shall be a `bdlat`-compatible sequence, choice, or array type, or a
    /// `bdlat`-compatible dynamic type referring to one of those types.
    /// Return 0 on success, and a non-zero value otherwise.  Note that this
    /// function behaves identically to `decode`, but does not instantiate any
    /// templates at compile time at the expense of being slightly slower at
    /// runtime; see the `baljsn` package documentation for more details.
    template <class TYPE>
    int decodeAny(bsl::streambuf        *streamBuf,
                  TYPE                  *value,
                  const DecoderOptions&  options);
    template <class TYPE>
    int decodeAny(bsl::streambuf        *streamBuf,
                  TYPE                  *value,
                  const DecoderOptions  *options = 0);
    int decodeAny(bsl::streambuf        *streamBuf,
                  bdlar::AnyRef         *value,
                  const DecoderOptions&  options);

    /// Decode into the specified `value`, of a (template parameter) `TYPE`,
    /// the JSON data read from the specified `stream` and using the specified
    /// `options`.  `TYPE` shall be a `bdlat`-compatible sequence, choice, or
    /// array type, or a `bdlat`-compatible dynamic type referring to one of
    /// those types.  Specifying a nullptr `options` is equivalent to passing a
    /// default-constructed `DecoderOptions` in `options`.  Return 0 on
    /// success, and a non-zero value otherwise.  Note that this function
    /// behaves identically to `decode`, but does not instantiate any templates
    /// at compile time at the expense of being slightly slower at runtime; see
    /// the `baljsn` package documentation for more details.
    template <class TYPE>
    int decodeAny(bsl::istream&          stream,
                  TYPE                  *value,
                  const DecoderOptions&  options);
    template <class TYPE>
    int decodeAny(bsl::istream&          stream,
                  TYPE                  *value,
                  const DecoderOptions  *options = 0);
    int decodeAny(bsl::istream&          stream,
                  bdlar::AnyRef         *value,
                  const DecoderOptions&  options);

    // ACCESSORS

    /// Return a string containing any error, warning, or trace messages that
    /// were logged during the last call to the `decode` method.  The log is
    /// reset each time `decode` is called.
    bsl::string loggedMessages() const;

    /// Return the number of unknown elements that were skipped during the
    /// previous decoding operation.  Note that unknown elements are skipped
    /// only if `options.skipUnknownElements() == true`.
    int numUnknownElementsSkipped() const;
};

                       // =============================
                       // struct Decoder_ElementVisitor
                       // =============================

/// This `class` implements a visitor for decoding elements within a sequence,
/// choice, or array type.  This is a component-private class and should not be
/// used outside of this component.  Note that the operators provided in this
/// `class` match the function signatures required of visitors decoding into
/// elements of compatible types.
struct Decoder_ElementVisitor {

    // DATA
    Decoder *d_decoder_p;  // decoder (held, not owned)
    int      d_mode;       // formatting mode

    // CREATORS

    // Creators have been omitted to allow simple static initialization of this
    // struct.

    // MANIPULATORS

    /// Decode into the specified `value` the data in the JSON format.
    /// Return 0 on success and a non-zero value otherwise.
    template <class TYPE>
    int operator()(TYPE *value);

    /// Decode into the specified `value` using the specified `info` the data
    /// in the JSON format.  Return 0 on success and a non-zero value
    /// otherwise.
    template <class TYPE, class INFO>
    int operator()(TYPE *value, const INFO& info);
};

                       // =============================
                       // struct Decoder_DecodeImpProxy
                       // =============================

/// This class provides a functor that dispatches the appropriate `decodeImp`
/// method for a `bdeat` Dynamic type.  Note that the operators provided in
/// this `class` match the function signatures required of visitors decoding
/// into compatible types.
struct Decoder_DecodeImpProxy {

    // DATA
    Decoder *d_decoder_p;  // decoder (held, not owned)
    int      d_mode;       // formatting mode

    // CREATORS

    // Creators have been omitted to allow simple static initialization of this
    // struct.

    // MANIPULATORS
    template <class TYPE>
    int operator()(TYPE *, bslmf::Nil);

    /// Dencode into the specified `value` of the specified `bdeat` `category`
    /// from the data in the JSON format.  Return 0 on success and a non-zero
    /// value otherwise.
    template <class TYPE, class ANY_CATEGORY>
    int operator()(TYPE *object, ANY_CATEGORY category);
};

                       // ===================================
                       // struct Decoder_RequiredAttrsVisitor
                       // ===================================

/// This class provides a functor that finds and remembers all non-optional
/// sequence attributes.
struct Decoder_RequiredAttrsVisitor {
    // PUBLIC DATA
    bsl::unordered_set<bsl::string> *d_requiredAttributes_p;
    bool                             d_usesDefaultValueFlag;

    // MANIPULATORS
    template <class TYPE, class INFO>
    int operator()(TYPE *value, const INFO& info);
};

// ============================================================================
//                            INLINE DEFINITIONS
// ============================================================================

                               // -------------
                               // class Decoder
                               // -------------

// PRIVATE TYPES
struct Decoder::CustomizedManipulator {
    // PUBLIC DATA
    bsl::ostringstream& d_logStream;
    bslstl::StringRef   d_dataValue;
    bool                d_convertCalled;

    // MANIPULATORS
    template <class t_BASE_TYPE>
    typename bsl::enable_if<!bdlar::IsRef<t_BASE_TYPE>::value, int>::type
    operator()(t_BASE_TYPE *value) {
        int rc = ParserUtil::getValue(value, d_dataValue);
        // For integral base types, we also accept a quoted representation
        // (DRQS 166048981).
        if (bsl::is_integral<t_BASE_TYPE>::value && 0 != rc &&
            ParserUtil::stripQuotes(&d_dataValue)) {
            rc = ParserUtil::getValue(value, d_dataValue);
        }
        if (rc) {
            d_logStream << "Could not decode Enum Customized, "
                        << "value not allowed \"" << d_dataValue << "\"\n";
            return -1;                                                // RETURN
        }
        d_convertCalled = true;
        return 0;
    }

    /// Stub for the `bdlar` ref types.
    template <class t_REF>
    typename bsl::enable_if<bdlar::IsRef<t_REF>::value, int>::type
    operator()(t_REF *) {
        return -1;
    }
};

// PRIVATE MANIPULATORS
template <class TYPE>
inline
int Decoder::decodeImp(TYPE *value, int mode, bdlat_TypeCategory::DynamicType)
{
    Decoder_DecodeImpProxy proxy = { this, mode };
    return bdlat_TypeCategoryUtil::manipulateByCategory(value, proxy);
}

template <class TYPE>
int Decoder::decodeImp(TYPE *value, int mode, bdlat_TypeCategory::Sequence)
{
    if (bdlat_FormattingMode::e_UNTAGGED & mode) {
        // This is an anonymous element.  Do not read anything and instead
        // decode into the corresponding sub-element.

        if (bdlat_SequenceFunctions::hasAttribute(
                                   *value,
                                   d_elementName.data(),
                                   static_cast<int>(d_elementName.length()))) {
            Decoder_ElementVisitor visitor = { this, mode };

            if (0 != bdlat_SequenceFunctions::manipulateAttribute(
                                   value,
                                   visitor,
                                   d_elementName.data(),
                                   static_cast<int>(d_elementName.length()))) {
                d_logStream << "Could not decode sequence, error decoding "
                            << "element or bad element name '"
                            << d_elementName << "' \n";
                return -1;                                            // RETURN
            }
        }
        else {
            if (d_skipUnknownElements) {
                const int rc = skipUnknownElement(d_elementName);
                if (rc) {
                    d_logStream << "Error reading unknown element '"
                                << d_elementName << "' or after it\n";
                    return -1;                                        // RETURN
                }
            }
            else {
                d_logStream << "Unknown element '" << d_elementName
                            << "' found\n";
                return -1;                                            // RETURN
            }
        }
    }
    else {
        if (++d_currentDepth > d_maxDepth) {
            d_logStream << "Maximum allowed decoding depth reached: "
                        << d_currentDepth << "\n";
            return -1;                                                // RETURN
        }

        if (Tokenizer::e_START_OBJECT != d_tokenizer.tokenType()) {
            d_logStream << "Could not decode sequence, missing starting '{'\n";
            return -1;                                                // RETURN
        }

        int rc = d_tokenizer.advanceToNextToken();
        if (rc) {
            d_logStream << "Could not decode sequence, ";
            logTokenizerError("error") << " reading token after '{'\n";
            return -1;                                                // RETURN
        }

        bdlma::LocalSequentialAllocator<512> localAllocator;
        bsl::unordered_set<bsl::string> requiredAttributes(&localAllocator);
        if (!d_allowMissingRequiredAttributes) {
            // Collect a list of the non-optional attributes
            Decoder_RequiredAttrsVisitor visitor = {&requiredAttributes,
                       ::BloombergLP::bdlat_UsesDefaultValueFlag<TYPE>::value};
            bdlat_SequenceFunctions::manipulateAttributes(value, visitor);
        }

        while (Tokenizer::e_ELEMENT_NAME == d_tokenizer.tokenType()) {

            bslstl::StringRef elementName;
            rc = d_tokenizer.value(&elementName);
            if (rc) {
                d_logStream << "Error reading attribute name after '{'\n";
                return -1;                                            // RETURN
            }

            if (bdlat_SequenceFunctions::hasAttribute(
                                     *value,
                                     elementName.data(),
                                     static_cast<int>(elementName.length()))) {
                d_elementName = elementName;

                rc = d_tokenizer.advanceToNextToken();
                if (rc) {
                    logTokenizerError("Error") << " reading value for"
                                 << " attribute '" << d_elementName << "' \n";
                    return -1;                                        // RETURN
                }

                Decoder_ElementVisitor visitor = { this, mode };

                if (0 != bdlat_SequenceFunctions::manipulateAttribute(
                                   value,
                                   visitor,
                                   d_elementName.data(),
                                   static_cast<int>(d_elementName.length()))) {
                    d_logStream << "Could not decode sequence, error decoding "
                                << "element or bad element name '"
                                << d_elementName << "' \n";
                    return -1;                                        // RETURN
                }

                if (!requiredAttributes.empty()) {
                    requiredAttributes.erase(elementName);
                }
            }
            else {
                if (d_skipUnknownElements) {
                    rc = skipUnknownElement(elementName);
                    if (rc) {
                        d_logStream << "Error reading unknown element '"
                                    << elementName << "' or after it\n";
                        return -1;                                    // RETURN
                    }
                }
                else {
                    d_logStream << "Unknown element '"
                                << elementName << "' found\n";
                    return -1;                                        // RETURN
                }
            }

            rc = d_tokenizer.advanceToNextToken();
            if (rc) {
                d_logStream << "Could not decode sequence, ";
                logTokenizerError("error") << " reading token"
                            << " after value for attribute '"
                            << d_elementName << "' \n";
                return -1;                                            // RETURN
            }
        }

        if (Tokenizer::e_END_OBJECT != d_tokenizer.tokenType()) {
            d_logStream << "Could not decode sequence, "
                        << "missing terminator '}' or seperator ','\n";
            return -1;                                                // RETURN
        }

        if (!requiredAttributes.empty()) {
            // There are non-optional attributes that are not presented
            // in the decoded message (sequence).
            d_logStream << "Could not decode sequence, "
                        << "missing required attribute \""
                        << *requiredAttributes.begin() << "\"\n";
            return -1;                                                // RETURN
        }

        --d_currentDepth;
    }
    return 0;
}

template <class TYPE>
int Decoder::decodeImp(TYPE                       *value,
                       int                         mode,
                       bdlat_TypeCategory::Choice)
{
    if (bdlat_FormattingMode::e_UNTAGGED & mode) {
        // This is an anonymous element.  Do not read anything and instead
        // decode into the corresponding sub-element.

        bslstl::StringRef selectionName;
        selectionName.assign(d_elementName.begin(), d_elementName.end());

        if (bdlat_ChoiceFunctions::hasSelection(
                                   *value,
                                   selectionName.data(),
                                   static_cast<int>(selectionName.length()))) {
            if (0 != bdlat_ChoiceFunctions::makeSelection(
                                   value,
                                   selectionName.data(),
                                   static_cast<int>(selectionName.length()))) {
                d_logStream << "Could not decode choice, bad selection name '"
                            << selectionName << "' \n";
                return -1;                                            // RETURN
            }

            Decoder_ElementVisitor visitor = { this, mode };

            if (0 != bdlat_ChoiceFunctions::manipulateSelection(value,
                                                                visitor)) {
                d_logStream << "Could not decode choice, selection "
                            << "was not decoded\n";
                return -1;                                            // RETURN
            }
        }
        else {
            if (d_skipUnknownElements) {
                const int rc = skipUnknownElement(selectionName);
                if (rc) {
                    d_logStream << "Error reading unknown element '"
                                << selectionName << "' or after that "
                                << "element\n";
                    return -1;                                        // RETURN
                }
            }
            else {
                d_logStream << "Unknown element '"
                            << selectionName << "' found\n";
                return -1;                                            // RETURN
            }
        }
    }
    else {
        if (++d_currentDepth > d_maxDepth) {
            d_logStream << "Maximum allowed decoding depth reached: "
                        << d_currentDepth << "\n";
            return -1;                                                // RETURN
        }

        if (Tokenizer::e_START_OBJECT != d_tokenizer.tokenType()) {
            d_logStream << "Could not decode choice, missing starting {\n";
            return -1;                                                // RETURN
        }

        int rc = d_tokenizer.advanceToNextToken();
        if (rc) {
            d_logStream << "Could not decode choice, ";
            logTokenizerError("error") << " reading token after {\n";
            return -1;                                                // RETURN
        }

        if (Tokenizer::e_ELEMENT_NAME == d_tokenizer.tokenType()) {
            bslstl::StringRef selectionName;
            rc = d_tokenizer.value(&selectionName);
            if (rc) {
                d_logStream << "Error reading selection name after '{'\n";
                return -1;                                            // RETURN
            }

            if (bdlat_ChoiceFunctions::hasSelection(
                                   *value,
                                   selectionName.data(),
                                   static_cast<int>(selectionName.length()))) {
                if (0 != bdlat_ChoiceFunctions::makeSelection(
                                   value,
                                   selectionName.data(),
                                   static_cast<int>(selectionName.length()))) {
                    d_logStream << "Could not decode choice, bad selection "
                                << "name '" << selectionName << "' \n";
                    return -1;                                        // RETURN
                }

                rc = d_tokenizer.advanceToNextToken();
                if (rc) {
                    d_logStream << "Could not decode choice, ";
                    logTokenizerError("error") << " reading value \n";
                    return -1;                                        // RETURN
                }

                Decoder_ElementVisitor visitor = { this, mode };

                if (0 != bdlat_ChoiceFunctions::manipulateSelection(value,
                                                                    visitor)) {
                    d_logStream << "Could not decode choice, selection "
                                << "was not decoded\n";
                    return -1;                                        // RETURN
                }
            }
            else {
                if (d_skipUnknownElements) {
                    rc = skipUnknownElement(selectionName);
                    if (rc) {
                        d_logStream << "Error reading unknown element '"
                                    << selectionName << "' or after that "
                                    << "element\n";
                        return -1;                                    // RETURN
                    }
                }
                else {
                    d_logStream << "Unknown element '"
                                << selectionName << "' found\n";
                    return -1;                                        // RETURN
                }
            }

            rc = d_tokenizer.advanceToNextToken();
            if (rc) {
                d_logStream << "Could not decode choice, ";
                logTokenizerError("error") << " reading token after value for"
                                                               " selection \n";

                return -1;                                            // RETURN
            }
        }

        if (Tokenizer::e_END_OBJECT != d_tokenizer.tokenType()) {
            d_logStream << "Could not decode choice, "
                        << "missing terminator '}'\n";
            return -1;                                                // RETURN
        }

        --d_currentDepth;
    }
    return 0;
}

template <class TYPE>
int Decoder::decodeImp(TYPE *value, int, bdlat_TypeCategory::Enumeration)
{
    enum { k_MIN_ENUM_STRING_LENGTH = 2 };

    if (Tokenizer::e_ELEMENT_VALUE != d_tokenizer.tokenType()) {
        d_logStream << "Enumeration element value was not found\n";
        return -1;                                                    // RETURN
    }

    bslstl::StringRef dataValue;
    int rc = d_tokenizer.value(&dataValue);
    if (rc) {
        d_logStream << "Error reading enumeration value\n";
        return -1;                                                    // RETURN
    }

    if (dataValue.length() >= k_MIN_ENUM_STRING_LENGTH &&
        '"' == dataValue.front() && '"' == dataValue.back()) {
        const int                                 kBufSize = 128;
        bdlma::LocalSequentialAllocator<kBufSize> bufferAllocator;
        bsl::string                               tmpString(&bufferAllocator);

        rc = baljsn::ParserUtil::getValue(&tmpString, dataValue);
        if (rc) {
            d_logStream << "Error reading enumeration value\n";
            return -1;                                                // RETURN
        }

        rc = bdlat::EnumUtil::fromStringOrFallbackIfEnabled(
                value, tmpString.data(), static_cast<int>(tmpString.size()));
        if (rc) {
            d_logStream << "Could not decode Enum String, value not allowed \""
                        << dataValue << "\"\n";
        }

        return rc;                                                    // RETURN
    }

    // We also accept an unquoted integer (DRQS 166048981).
    int intValue;
    rc = ParserUtil::getValue(&intValue, dataValue);
    if (rc) {
        d_logStream << "Error reading enumeration value\n";
        return -1;                                                    // RETURN
    }

    rc = bdlat::EnumUtil::fromIntOrFallbackIfEnabled(value, intValue);
    if (rc) {
        d_logStream << "Could not decode int Enum, value " << intValue
                    << " not allowed\n";
    }

    return rc;
}

template <class TYPE>
int Decoder::decodeImp(TYPE *value, int, bdlat_TypeCategory::CustomizedType)
{
    if (Tokenizer::e_ELEMENT_VALUE != d_tokenizer.tokenType()) {
        d_logStream << "Customized element value was not found\n";
        return -1;                                                    // RETURN
    }

    bslstl::StringRef dataValue;
    int rc = d_tokenizer.value(&dataValue);
    if (rc) {
        d_logStream << "Error reading customized type value\n";
        return -1;                                                    // RETURN
    }

    CustomizedManipulator baseManipulator = {d_logStream, dataValue, false};
    rc = bdlat_CustomizedTypeFunctions::createBaseAndConvert(value,
                                                             baseManipulator);

    if (rc && baseManipulator.d_convertCalled) {
        d_logStream << "Could not convert base type to customized type, "
                    << "base value disallowed: \"";
        bdlb::PrintMethods::print(d_logStream, dataValue, 0, -1);
        d_logStream << "\"\n";
    }
    return rc;
}

template <class TYPE>
int Decoder::decodeImp(TYPE *value, int, bdlat_TypeCategory::Simple)
{
    if (Tokenizer::e_ELEMENT_VALUE != d_tokenizer.tokenType()) {
        d_logStream << "Simple element value was not found\n";
        return -1;                                                    // RETURN
    }

    bslstl::StringRef dataValue;
    int rc = d_tokenizer.value(&dataValue);
    if (rc) {
        d_logStream << "Error reading simple value\n";
        return -1;                                                    // RETURN
    }

    rc = ParserUtil::getValue(value, dataValue);
    // For integral types, we also accept a quoted representation
    // (DRQS 166048981).
    if (bsl::is_integral<TYPE>::value && 0 != rc &&
        ParserUtil::stripQuotes(&dataValue)) {
        rc = ParserUtil::getValue(value, dataValue);
    }
    return rc;
}

inline
int Decoder::decodeImp(bsl::vector<char> *value,
                              int,
                              bdlat_TypeCategory::Array)
{
    if (Tokenizer::e_ELEMENT_VALUE != d_tokenizer.tokenType()) {
        d_logStream << "Could not decode vector<char> "
                    << "expected as an element value\n";
        return -1;                                                    // RETURN
    }

    bslstl::StringRef dataValue;
    int rc = d_tokenizer.value(&dataValue);

    if (rc) {
        d_logStream << "Error reading customized type element value\n";
        return -1;                                                    // RETURN
    }

    return ParserUtil::getValue(value, dataValue);
}

template <class TYPE>
int Decoder::decodeImp(TYPE                      *value,
                       int                        mode,
                       bdlat_TypeCategory::Array)
{
    if (Tokenizer::e_START_ARRAY != d_tokenizer.tokenType()) {
        d_logStream << "Could not decode vector, missing start token: '['\n";
        return -1;                                                    // RETURN
    }

    int rc = d_tokenizer.advanceToNextToken();
    if (rc) {
        logTokenizerError("Error") << " reading array.\n";
        return rc;                                                    // RETURN
    }

    int i = 0;
    while (Tokenizer::e_END_ARRAY != d_tokenizer.tokenType()) {
        if (Tokenizer::e_ELEMENT_VALUE == d_tokenizer.tokenType()
         || Tokenizer::e_START_OBJECT  == d_tokenizer.tokenType()
         || Tokenizer::e_START_ARRAY   == d_tokenizer.tokenType()) {
            ++i;
            bdlat_ArrayFunctions::resize(value, i);

            Decoder_ElementVisitor visitor = { this, mode };

            if (0 != bdlat_ArrayFunctions::manipulateElement(value,
                                                             visitor,
                                                             i - 1)) {
                d_logStream << "Error adding element '" << i - 1 << "'\n";
                return -1;                                            // RETURN
            }

            rc = d_tokenizer.advanceToNextToken();
            if (rc) {
                logTokenizerError("Error") << " reading token after value of"
                                " element '" << i - 1 << "'\n";
                return rc;                                            // RETURN
            }
        }
        else {
            d_logStream << "Erroneous token found instead of array element\n";
            return -1;                                                // RETURN
        }
    }

    if (Tokenizer::e_END_ARRAY != d_tokenizer.tokenType()) {
        d_logStream << "Could not decode vector, missing end token: ']'\n";
        return -1;                                                    // RETURN
    }

    return 0;
}

template <class TYPE>
int Decoder::decodeImp(TYPE                              *value,
                       int                                mode,
                       bdlat_TypeCategory::NullableValue)
{
    enum { k_NULL_VALUE_LENGTH = 4 };

    if (Tokenizer::e_ELEMENT_VALUE == d_tokenizer.tokenType()) {
        bslstl::StringRef dataValue;
        const int rc = d_tokenizer.value(&dataValue);
        if (rc) {
            return rc;                                                // RETURN
        }

        if (k_NULL_VALUE_LENGTH == dataValue.length()
         && 'n'                 == dataValue[0]
         && 'u'                 == dataValue[1]
         && 'l'                 == dataValue[2]
         && 'l'                 == dataValue[3]) {
            return 0;                                                 // RETURN
        }
    }

    bdlat_NullableValueFunctions::makeValue(value);

    Decoder_ElementVisitor visitor = { this, mode };
    return bdlat_NullableValueFunctions::manipulateValue(value, visitor);
}

template <class TYPE, class ANY_CATEGORY>
inline
int Decoder::decodeImp(TYPE *, ANY_CATEGORY)
{
    BSLS_ASSERT_OPT(0 == "Unreachable");

    return -1;
}

// CREATORS
inline
Decoder::Decoder(bslma::Allocator *basicAllocator)
: d_logStream(basicAllocator)
, d_tokenizer(basicAllocator)
, d_elementName(basicAllocator)
, d_currentDepth(0)
, d_maxDepth(0)
, d_skipUnknownElements(false)
, d_numUnknownElementsSkipped(0)
, d_allowMissingRequiredAttributes(
         DecoderOptions::DEFAULT_INITIALIZER_ALLOW_MISSING_REQUIRED_ATTRIBUTES)
{
}

// MANIPULATORS
template <class TYPE>
int Decoder::decode(bsl::streambuf        *streamBuf,
                    TYPE                  *value,
                    const DecoderOptions&  options)
{
    BSLS_ASSERT(streamBuf);
    BSLS_ASSERT(value);

    d_logStream.clear();
    d_logStream.str("");

    d_maxDepth                  = options.maxDepth();
    d_skipUnknownElements       = options.skipUnknownElements();
    d_numUnknownElementsSkipped = 0;

    d_allowMissingRequiredAttributes =
                                      options.allowMissingRequiredAttributes();

    bdlat_TypeCategory::Value category =
                                bdlat_TypeCategoryFunctions::select(*value);

    if (bdlat_TypeCategory::e_SEQUENCE_CATEGORY != category
     && bdlat_TypeCategory::e_CHOICE_CATEGORY   != category
     && bdlat_TypeCategory::e_ARRAY_CATEGORY    != category) {
        d_logStream << "The object being decoded must be a Sequence, "
                    << "Choice, or Array type\n";
        return -1;                                                    // RETURN
    }

    d_tokenizer.reset(streamBuf);
    d_tokenizer
    .setAllowStandAloneValues(false)
    .setAllowHeterogenousArrays(true) // needed for nillable arrays
    .setAllowNonUtf8StringLiterals(!options.validateInputIsUtf8())
    .setAllowConsecutiveSeparators( options.allowConsecutiveSeparators())
    .setAllowFormFeedAsWhitespace(  options.allowFormFeedAsWhitespace())
    .setAllowTrailingTopLevelComma(true)
    .setAllowUnescapedControlCharacters(
                                    options.allowUnescapedControlCharacters());

    typedef typename bdlat_TypeCategory::Select<TYPE>::Type TypeCategory;

    int rc = d_tokenizer.advanceToNextToken();
    if (rc) {
        logTokenizerError("Error") << " advancing to the first token. "
                             "Expecting a '{' or '[' as the first character\n";
        return rc;                                                    // RETURN
    }

    bdlat_ValueTypeFunctions::reset(value);

    rc = decodeImp(value, 0, TypeCategory());

    d_tokenizer.resetStreamBufGetPointer();

    return rc;
}

template <class TYPE>
int Decoder::decode(bsl::streambuf        *streamBuf,
                    TYPE                  *value,
                    const DecoderOptions  *options)
{
    DecoderOptions localOpts;
    return decode(streamBuf, value, options ? *options : localOpts);
}

template <class TYPE>
int Decoder::decode(bsl::istream&          stream,
                    TYPE                  *value,
                    const DecoderOptions&  options)
{
    if (!stream.good()) {
        d_logStream
            << "Input stream state is not 'good()' ["
            << (stream.bad()  ? " 'bad()'"  : "")
            << (stream.fail() ? " 'fail()'" : "")
            << (stream.eof()  ? " 'eof()'"  : "")
            << " ] - nothing to decode\n";
        return -1;                                                    // RETURN
    }

    if (0 != decode(stream.rdbuf(), value, options)) {
        stream.setstate(bsl::ios_base::failbit);
        return -1;                                                    // RETURN
    }

    return 0;
}

template <class TYPE>
int Decoder::decode(bsl::istream&          stream,
                    TYPE                  *value,
                    const DecoderOptions  *options)
{
    DecoderOptions localOpts;
    return decode(stream, value, options ? *options : localOpts);
}

template <class TYPE>
int Decoder::decode(bsl::streambuf *streamBuf, TYPE *value)
{
    const DecoderOptions options;
    return decode(streamBuf, value, options);
}

template <class TYPE>
int Decoder::decode(bsl::istream& stream, TYPE *value)
{
    const DecoderOptions options;
    return decode(stream, value, options);
}

template <class TYPE>
inline
int Decoder::decodeAny(bsl::streambuf        *streamBuf,
                       TYPE                  *value,
                       const DecoderOptions&  options)
{
    bdlar::AnyRef any = bdlar::RefUtil::makeAnyRef(*value);
    return decodeAny(streamBuf, &any, options);
}

template <class TYPE>
inline
int Decoder::decodeAny(bsl::streambuf        *streamBuf,
                       TYPE                  *value,
                       const DecoderOptions  *options)
{
    return decodeAny(streamBuf, value, options ? *options : DecoderOptions());
}

template <class TYPE>
inline
int Decoder::decodeAny(bsl::istream&          stream,
                       TYPE                  *value,
                       const DecoderOptions&  options)
{
    bdlar::AnyRef any = bdlar::RefUtil::makeAnyRef(*value);
    return decodeAny(stream, &any, options);
}

template <class TYPE>
inline
int Decoder::decodeAny(bsl::istream&          stream,
                       TYPE                  *value,
                       const DecoderOptions  *options)
{
    return decodeAny(stream, value, options ? *options : DecoderOptions());
}

// ACCESSORS
inline
bsl::string Decoder::loggedMessages() const
{
    return d_logStream.str();
}

inline
int Decoder::numUnknownElementsSkipped() const
{
    return d_numUnknownElementsSkipped;
}

                       // -----------------------------
                       // struct Decoder_ElementVisitor
                       // -----------------------------

template <class TYPE>
inline
int Decoder_ElementVisitor::operator()(TYPE *value)
{
    typedef typename bdlat_TypeCategory::Select<TYPE>::Type TypeCategory;
    return d_decoder_p->decodeImp(value, d_mode, TypeCategory());
}

template <class TYPE, class INFO>
inline
int Decoder_ElementVisitor::operator()(TYPE *value, const INFO& info)
{
    typedef typename bdlat_TypeCategory::Select<TYPE>::Type TypeCategory;
    return d_decoder_p->decodeImp(value,
                                  info.formattingMode(),
                                  TypeCategory());
}

                       // -----------------------------
                       // struct Decoder_DecodeImpProxy
                       // -----------------------------

// MANIPULATORS
template <class TYPE>
inline
int Decoder_DecodeImpProxy::operator()(TYPE *, bslmf::Nil)
{
    BSLS_ASSERT_OPT(0 == "Unreachable");

    return -1;
}

template <class TYPE, class ANY_CATEGORY>
inline
int Decoder_DecodeImpProxy::operator()(TYPE         *object,
                                       ANY_CATEGORY  category)
{
    return d_decoder_p->decodeImp(object, d_mode, category);
}

                       // -----------------------------------
                       // struct Decoder_RequiredAttrsVisitor
                       // -----------------------------------

// MANIPULATORS
template <class TYPE, class INFO>
inline
int Decoder_RequiredAttrsVisitor::operator()(TYPE *value, const INFO& info) {
    switch(bdlat_TypeCategoryFunctions::select(*value))
    {
      case bdlat_TypeCategory::e_NULLABLE_VALUE_CATEGORY:
        break; // skip
      // The following categories can have default values:
      case bdlat_TypeCategory::e_SIMPLE_CATEGORY:
      case bdlat_TypeCategory::e_ENUMERATION_CATEGORY:
      case bdlat_TypeCategory::e_CUSTOMIZED_TYPE_CATEGORY:
        if(!d_usesDefaultValueFlag) {
            break; // skip
        }
        BSLA_FALLTHROUGH;
      default:
        if (!d_usesDefaultValueFlag ||
            !(info.formattingMode() & bdlat_FormattingMode::e_DEFAULT_VALUE)) {
            d_requiredAttributes_p->emplace(info.name(), info.nameLength());
        }
    }
    return 0;
}

}  // close package namespace
}  // close enterprise namespace

#endif

// ----------------------------------------------------------------------------
// Copyright 2015 Bloomberg Finance L.P.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------------------- END-OF-FILE ----------------------------------
